package SparkSmallPaper
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.log4j.{Level, Logger}
//x(0)是买家id，x(1)是商品id,x(2)是商品类别，x(3)是卖家id,x(4)是品牌id，x(5)是交易月，x(6)是交易日，x(7)是买家行为，x(8)是买家年龄段,x(9)是买家性别，x(10)是收获地址，
//该代码是实现查看卖的最好的前5个品牌
object demo06 {
  def main(args: Array[String]): Unit = {
        val conf = new SparkConf().setMaster("local[2]").setAppName(demo01.getClass.getSimpleName)
        Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
        val sc = new SparkContext(conf)
        transformationOps5(sc)
        sc.stop()
    }
  def transformationOps5(sc:SparkContext): Unit = {
        val lines = sc.textFile("file:///C:/Users/asus/Desktop/hadoop_experiment/data/user_log.csv")
        //lines.foreach(println)
        val wordsRDD = lines.map(line => line.split(","))
        
        val seller=wordsRDD.map(w=>(w(4),w(7))).filter(w=>w._2.equals("2"))
        
        val grou1=seller.groupByKey().map(x=>(x._2.size,x._1))
        
        val sortRdd=grou1.sortByKey(ascending=false, numPartitions=1).take(5)

        sortRdd.foreach(w=>println("品牌ID:"+w._2+"  出售数量:"+w._1))
        

    }
}